{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## This example compares the ``SelfPacedEnsembleClassifier`` with other methods"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "RANDOM_STATE = 42"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Preparation\n",
    "First, we will import necessary packages and load the **covtype** dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from self_paced_ensemble import SelfPacedEnsembleClassifier\n",
    "from self_paced_ensemble.canonical_ensemble import *\n",
    "from self_paced_ensemble.utils import load_covtype_dataset\n",
    "from self_paced_ensemble.self_paced_ensemble.base import sort_dict_by_key\n",
    "\n",
    "from time import time\n",
    "from collections import Counter\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "from sklearn.decomposition import KernelPCA\n",
    "from sklearn.datasets import make_classification\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import average_precision_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Dataset used: \t\tForest covertypes from UCI (10.0% random subset)\n",
      "Positive target:\t7\n",
      "Imbalance ratio:\t27.328\n",
      "Original training dataset shape {0: 44840, 1: 1640}\n",
      "Original test dataset shape     {0: 11211, 1: 411}\n"
     ]
    }
   ],
   "source": [
    "X_train, X_test, y_train, y_test = load_covtype_dataset(subset=0.1, random_state=RANDOM_STATE)\n",
    "\n",
    "origin_distr = sort_dict_by_key(Counter(y_train))\n",
    "test_distr = sort_dict_by_key(Counter(y_test))\n",
    "print('Original training dataset shape %s' % origin_distr)\n",
    "print('Original test dataset shape     %s' % test_distr)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train all ensemble classifiers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training  SelfPacedEnsemble   | AUPRC 0.907 | Time 0.482s\n",
      "Training     SMOTEBagging     | AUPRC 0.895 | Time 14.080s\n",
      "Training      SMOTEBoost      | AUPRC 0.479 | Time 3.110s\n",
      "Training     UnderBagging     | AUPRC 0.769 | Time 0.583s\n",
      "Training       RUSBoost       | AUPRC 0.531 | Time 0.196s\n",
      "Training    BalanceCascade    | AUPRC 0.871 | Time 0.464s\n"
     ]
    }
   ],
   "source": [
    "init_kwargs = {\n",
    "    'n_estimators': 10,\n",
    "    'random_state': RANDOM_STATE,\n",
    "}\n",
    "fit_kwargs = {\n",
    "    'X': X_train,\n",
    "    'y': y_train,\n",
    "}\n",
    "\n",
    "ensembles = {\n",
    "    'SelfPacedEnsemble': SelfPacedEnsembleClassifier,\n",
    "    'SMOTEBagging': SMOTEBaggingClassifier,\n",
    "    'SMOTEBoost': SMOTEBoostClassifier,\n",
    "    'UnderBagging': UnderBaggingClassifier,\n",
    "    'RUSBoost': RUSBoostClassifier,\n",
    "    'BalanceCascade': BalanceCascadeClassifier,\n",
    "}\n",
    "\n",
    "fit_ensembles = {}\n",
    "for ensemble_name, ensemble_class in ensembles.items():\n",
    "    ensemble_clf = ensemble_class(**init_kwargs)\n",
    "    print ('Training {:^20s} '.format(ensemble_name), end='')\n",
    "    start_time = time()\n",
    "    ensemble_clf.fit(X_train, y_train)\n",
    "    fit_time = time() - start_time\n",
    "    y_pred = ensemble_clf.predict_proba(X_test)[:, 1]\n",
    "    score = average_precision_score(y_test, y_pred)\n",
    "    print ('| AUPRC {:.3f} | Time {:.3f}s'.format(score, fit_time))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
